#August 2025

# URL of the CSV file
url <- 'https://gattonweb.uky.edu/faculty/sheather/book/docs/datasets/anscombe.txt'

# Read the CSV file into a DataFrame
anscombe <- read.table(url, header = TRUE, sep = "", strip.white = TRUE)
attach(anscombe)

#Figure 3.1 on page 46
par(mfrow=c(2,2))
plot(x1,y1,xlim=c(4,20),ylim=c(3,14),main="Data Set 1")
abline(lsfit(x1,y1))
plot(x2,y2,xlim=c(4,20),ylim=c(3,14),main="Data Set 2")
abline(lsfit(x2,y2))
plot(x3,y3,xlim=c(4,20),ylim=c(3,14),main="Data Set 3")
abline(lsfit(x3,y3))
plot(x4,y4,xlim=c(4,20),ylim=c(3,14),main="Data Set 4")
abline(lsfit(x4,y4))

#Regression output on page 47
m1 <- lm(y1~x1)
summary(m1)
m2 <- lm(y2~x2)
summary(m2)
m3 <- lm(y3~x3)
summary(m3)
m4 <- lm(y4~x4)
summary(m4)

#Figure 3.2 on page 48
par(mfrow=c(2,2))
plot(x1,m1$residuals,ylab="Residuals",xlim=c(4,20),ylim=c(-3.5,3.5),main="Data Set 1")
plot(x2,m2$residuals,ylab="Residuals",xlim=c(4,20),ylim=c(-3.5,3.5),main="Data Set 2")
plot(x3,m3$residuals,ylab="Residuals",xlim=c(4,20),ylim=c(-3.5,3.5),main="Data Set 3")
plot(x4,m4$residuals,ylab="Residuals",xlim=c(4,20),ylim=c(-3.5,3.5),main="Data Set 4")

#Figure 3.3 on page 50
par(mfrow=c(1,2))
plot(x2,y2,,ylim=c(3,10))
abline(lsfit(x2,y2))
plot(x2,m2$residuals,ylab="Residuals",ylim=c(-2,2),main="Data Set 2")

detach(anscombe)


# URL of the CSV file
url <- 'https://gattonweb.uky.edu/faculty/sheather/book/docs/datasets/huber.txt'
rm(x)
# Read the CSV file into a DataFrame
huber <- read.table(url, header = TRUE)

attach(huber)

#Regression output on page 54
mBad <- lm(YBad~x)
summary(mBad)
mGood <- lm(YGood~x)
summary(mGood)

#Figure 3.7 on page 55
par(mfrow=c(1,2))
plot(x,YBad,ylim=c(-12,3))
abline(lsfit(x,YBad))
plot(x,YGood,ylim=c(-12,3))
abline(lsfit(x,YGood))

#Leverage values in Table 3.3 on page 57
# Extract leverage values
hBad <- lm.influence(mBad)$hat
hGood <- lm.influence(mGood)$hat

# Create tables
tableBad <- data.frame(
  Case = 1:length(hBad),
  x = mBad$model[[2]],           # the predictor in the model
  Leverage = round(hBad, 4)
)

tableGood <- data.frame(
  Case = 1:length(hGood),
  x = mGood$model[[2]],
  Leverage = round(hGood, 4)
)

# View the tables
print(tableBad)
print(tableGood)


#Regression output and Figure 3.8 on page 58
xq <- x^2
mBadq <- lm(YBad~x+I(x^2))
summary(mBadq)
xx <- c(-4:10)
yy <- summary(mBadq)$coef[1] + summary(mBadq)$coef[2]*xx + summary(mBadq)$coef[3]*xx^2
par(mfrow=c(1,1))
plot(xx,yy,ylim=c(-3,3),type="l",ylab="YBad",xlab="x")
points(x,YBad)

detach(huber)

# URL of the CSV file
url <- 'https://gattonweb.uky.edu/faculty/sheather/book/docs/datasets/BONDS.TXT'

# Read the CSV file into a DataFrame
bonds <- read.table(url, header = TRUE)

attach(bonds)

#Figure 3.9 on page 63
par(mfrow=c(1,1))
plot(CouponRate,BidPrice,xlab="Coupon Rate (%)", ylab="Bid Price ($)",ylim=c(85,120),xlim=c(2,14))
abline(lsfit(CouponRate,BidPrice))

#Regression output on page 63 
m1 <- lm(BidPrice~CouponRate)
summary(m1)

#95% confidence intervals on page 63
round(confint(m1,level=0.95),3)

# Table 3.4 on page 62 — clean output with proper headers
leverage1 <- hatvalues(m1)
StanRes1 <- rstandard(m1)
residual1 <- m1$residuals

table3.4 <- data.frame(
  Case = Case,
  CouponRate = CouponRate,
  BidPrice = BidPrice,
  Leverage = round(leverage1, 3),
  Residual = round(residual1, 3),
  StdResidual = round(StanRes1, 3)
)

# View the table
print(table3.4)

#Figure 3.10 on page 64
plot(CouponRate,StanRes1,xlab="Coupon Rate (%)", ylab="Standardized Residuals",xlim=c(2,14))
abline(h=2,lty=2)
abline(h=-2,lty=2)
identify(CouponRate,StanRes1,Case)
# Click near a point to identify its Case. 
# This continues until you select "Stop" after clicking the right mouse button.

#Regression output on page 66
m2 <- update(m1, subset=(1:35)[-c(4,13,35)])
summary(m2)

#Figure 3.11 on page 65
plot(CouponRate[-c(4,13,35)],BidPrice[-c(4,13,35)],xlab="Coupon Rate (%)", ylab="Bid Price ($)",ylim=c(85,120),xlim=c(2,14),main="Regular Bonds")
abline(m2)

#Figure 3.12 on page 67
StanRes2 <- rstandard(m2)
plot(CouponRate[-c(4,13,35)],StanRes2,xlab="Coupon Rate (%)", ylab="Standardized Residuals",xlim=c(2,14),main="Regular Bonds")
abline(h=2,lty=2)
abline(h=-2,lty=2)

#Figure 3.13 on page 68
cd1 <- cooks.distance(m1)
plot(CouponRate,cd1,xlab="Coupon Rate (%)", ylab="Cook's Distance")
abline(h=4/(35-2),lty=2)
identify(CouponRate,cd1,Case)
# Click near a point to identify its Case. 
# This continues until you select "Stop" after clicking the right mouse button.

detach(bonds)


# URL of the CSV file
url <- 'https://gattonweb.uky.edu/faculty/sheather/book/docs/datasets/production.txt'

# Read the CSV file into a DataFrame
production <- read.table(url, header = TRUE)

attach(production)

m1 <- lm(RunTime~RunSize)

#Figure 3.14 on page 70
par(mfrow=c(2,2))
plot(m1)

detach(production)


# URL of the CSV file
url <- 'https://gattonweb.uky.edu/faculty/sheather/book/docs/datasets/cleaning.txt'

# Read the CSV file into a DataFrame
cleaning <- read.table(url, header = TRUE)

attach(cleaning)

#Figure 3.15 on page 71
par(mfrow=c(1,1))
plot(Crews,Rooms,xlab="Number of Crews",ylab="Number of Rooms Cleaned")
abline(lsfit(Crews,Rooms))

#Regression output on pages 72 and 73
m1 <- lm(Rooms~Crews)
summary(m1)
predict(m1,newdata=data.frame(Crews=c(4,16)),interval="prediction",level=0.95)

#Figure 3.16 on page 73
StanRes1 <- rstandard(m1)
plot(Crews,StanRes1,xlab="Number of Crews", ylab="Standardized Residuals")

#Figure 3.17 on page 74
sabs <- sqrt(abs(StanRes1))
plot(Crews,sabs,xlab="Number of Crews", ylab="Square Root(|Standardized Residuals|)")
abline(lsfit(Crews,sabs))

#Figure 3.18 on page 75
par(mfrow=c(2,2))
plot(m1)

#Figure 3.19 on page 75
sqrt(tapply(Rooms,Crews,var))
sds <- c(3.000000,4.966555,4.690416,6.642665,7.927123,7.28991,12.000463)
xx <- c(2,4,6,8,10,12,16)
par(mfrow=c(1,1))
plot(xx,sds,xlab="Number of Crews", ylab="Standard deviation(Rooms Cleaned)")
abline(lsfit(xx,sds))

#Regression output on page 77
sqrtcrews <- sqrt(Crews)
sqrtrooms <- sqrt(Rooms)
m2 <- lm(sqrtrooms~sqrtcrews)
summary(m2)
predict(m2,newdata=data.frame(sqrtcrews=c(2,4)),interval="prediction",level=0.95)

#Figure 3.20 on page 78
par(mfrow=c(1,2))
plot(sqrt(Crews),sqrt(Rooms),xlab="Square Root(Number of Crews)",ylab="Square Root(Number of Rooms Cleaned)")
abline(lsfit(sqrt(Crews),sqrt(Rooms)))
StanRes2 <- rstandard(m2)
plot(sqrtcrews,StanRes2,xlab="Square Root(Number of Crews)", ylab="Standardized Residuals")

#Figure 3.21 on page 78
par(mfrow=c(2,2))
plot(m2)

detach(cleaning)


# URL of the CSV file
url <- 'https://gattonweb.uky.edu/faculty/sheather/book/docs/datasets/confood1.txt'

# Read the CSV file into a DataFrame
confood1 <- read.table(url, header = TRUE)

attach(confood1)

#Figure 3.22 on page 80
par(mfrow=c(1,1))
plot(Price,Sales)
abline(lsfit(Price,Sales))

#Figure 3.23 on page 81
plot(log(Price),log(Sales),xlab="log(Price)",ylab="log(Sales)")
abline(lsfit(log(Price),log(Sales)))

#Regression output on page 82
m1 <- lm(log(Sales)~log(Price))
summary(m1)

#Figure 3.24 on page 82
StanRes1 <- rstandard(m1)
plot(log(Price),StanRes1,xlab="log(Price)", ylab="Standardized Residuals")

detach(confood1)


url <- 'https://gattonweb.uky.edu/faculty/sheather/book/docs/datasets/responsetransformation.txt'
rm(x)
responsetransformation <- read.table(url, header = TRUE)
attach(responsetransformation)

#Figure 3.25 on page 84
plot(x,y)

#Figure 3.26 on page 85
plot(x,y)
m1 <- lm(y~x)
par(mfrow=c(1,2))
StanRes1 <- rstandard(m1)
absrtsr1 <- sqrt(abs(StanRes1))
plot(x,StanRes1,ylab="Standardized Residuals")
plot(x,absrtsr1,ylab="Square Root(|Standardized Residuals|)")

#Figure 3.27 on page 86
par(mfrow=c(3,2))
plot(density(y,bw="SJ",kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab="y")
rug(y)
boxplot(y,ylab="Y")
qqnorm(y, ylab = "Y")
qqline(y, lty = 2, col=2)
sj <- bw.SJ(x,lower = 0.05, upper = 100)
plot(density(x,bw=sj,kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab="x")
rug(x)
boxplot(x,ylab="x")
qqnorm(x, ylab = "x")
qqline(x, lty = 2, col=2)


#Figure 3.28 on page 87
# Load required packages
install.packages("alr4")
install.packages("car")

library(alr4)
library(car)

m1 <- lm(y~x) 

par(mfrow=c(1,1))
inverseResponsePlot(m1)

#Figure 3.29 on page 88
inverseResponsePlot(m1,lam=c(-1,-0.5, -0.33, -0.25, 0, 0.25, 0.33, 0.5,1))
lambda <- c(-1,-0.5, -0.33, -0.25, 0, 0.25, 0.33, 0.5,1)
RSS <- c(46673.9,24090.7,15264.2,11637.1,3583.8,440,266,880.2,7136.9)
plot(lambda,RSS,type="l",ylab=expression(RSS(lambda)),xlab=expression(lambda))

#Figure 3.30 on page 92
m1 <- lm(y~x) 

library(MASS)
par(mfrow=c(1,2))
boxcox(m1,lambda=seq(0.28,0.39,length=20))
boxcox(m1,lambda=seq(0.325,0.34,length=20))

#Regression output & Figure 3.31 on page 93
ty <- y^(1/3)
par(mfrow=c(2,2))
sj <- bw.SJ(ty,lower = 0.05, upper = 100)
plot(density(ty,bw=sj,kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab=expression(Y^(1/3)))
rug(ty)
boxplot(ty,ylab=expression(Y^(1/3)))
qqnorm(ty, ylab = expression(Y^(1/3)))
qqline(ty, lty = 2, col=2)
m2 <- lm(ty~x)
plot(x,ty,ylab=expression(Y^(1/3)))
abline(m2)
summary(m2)

detach(responsetransformation)

# Load required packages
install.packages("alr4")
install.packages("car")
library(alr4)
library(car)

# Load the dataset from alr4
data("salarygov")

# Attach the dataset for easy reference
attach(salarygov)

# Figure 3.32 on page 96
m1 <- lm(MaxSalary ~ Score)

# Set up the plotting layout
par(mfrow = c(2, 2))

# Plot the original data and regression line
plot(Score, MaxSalary)
abline(m1, lty = 2, col = 2)

# Standardized residuals and plots
StanRes1 <- rstandard(m1)
absrtsr1 <- sqrt(abs(StanRes1))
plot(Score, StanRes1, ylab = "Standardized Residuals")
plot(Score, absrtsr1, ylab = "Square Root(|Standardized Residuals|)")
abline(lsfit(Score, absrtsr1), lty = 2, col = 2)

# Output transformation summary using powerTransform from the car package
summary(tranxy <- powerTransform(cbind(MaxSalary, Score) ~ 1))

#Figure 3.33 on page 97
par(mfrow=c(3,2))
plot(density(MaxSalary,bw="SJ",kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab="MaxSalary")
rug(MaxSalary)
boxplot(MaxSalary,ylab="MaxSalary")
qqnorm(MaxSalary, ylab = "MaxSalary")
qqline(MaxSalary, lty = 2, col=2)
plot(density(Score,bw="SJ",kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab="Score")
rug(Score)
boxplot(Score,ylab="Score")
qqnorm(Score, ylab = "Score")
qqline(Score, lty = 2, col=2)

#Figure 3.34 on page 97
par(mfrow=c(1,1))
plot(sqrt(Score),log(MaxSalary),xlab=expression(sqrt(Score)))
abline(lsfit(sqrt(Score),log(MaxSalary)),lty=2,col=2)

#Figure 3.35 on page 98
par(mfrow=c(3,2))
plot(density(log(MaxSalary),bw="SJ",kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab="log(MaxSalary)")
rug(log(MaxSalary))
boxplot(log(MaxSalary),ylab="log(MaxSalary)")
qqnorm(log(MaxSalary), ylab = "log(MaxSalary)")
qqline(log(MaxSalary), lty = 2, col=2)
sj <- bw.SJ(sqrt(Score),lower = 0.05, upper = 100)
plot(density(sqrt(Score),bw=sj,kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab=expression(sqrt(Score)))
rug(sqrt(Score))
boxplot(sqrt(Score),ylab=expression(sqrt(Score)))
qqnorm(sqrt(Score), ylab=expression(sqrt(Score)))
qqline(sqrt(Score), lty = 2, col=2)

#Figure 3.36 on page 99
m2 <- lm(log(MaxSalary)~sqrt(Score))
par(mfrow=c(1,2))
StanRes2 <- rstandard(m2)
absrtsr2 <- sqrt(abs(StanRes2))
plot(sqrt(Score),StanRes2,ylab="Standardized Residuals",xlab=expression(sqrt(Score)))
plot(sqrt(Score),absrtsr2,ylab="Square Root(|Standardized Residuals|)",xlab=expression(sqrt(Score)))
abline(lsfit(sqrt(Score),absrtsr2),lty=2,col=2)

#R output on page 99
summary(tranx <- powerTransform(Score))

#Figure 3.37 on page 100
m3 <- lm(MaxSalary~sqrt(Score))

library(alr4)
library(car)

par(mfrow=c(1,1))
inverseResponsePlot(m3)

#Figure 3.38 on page 101
par(mfrow=c(2,2))
plot(density(MaxSalary^-0.25,bw="SJ",kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab=expression(MaxSalary^-0.25))
rug(MaxSalary^-0.25)
boxplot(MaxSalary^-0.25,ylab=expression(MaxSalary^-0.25))
qqnorm(MaxSalary^-0.25,ylab=expression(MaxSalary^-0.25))
qqline(MaxSalary^-0.25,lty=2, col=2)

#Figure 3.39 on page 102
par(mfrow=c(2,2))
plot(sqrt(Score),MaxSalary^-0.25,xlab=expression(sqrt(Score)),ylab=expression(MaxSalary^-0.25))
abline(lsfit(sqrt(Score),MaxSalary^-0.25),lty=2,col=2)
m3 <- lm(MaxSalary^-0.25~sqrt(Score))
StanRes3 <- rstandard(m3)
absrtsr3 <- sqrt(abs(StanRes3))
plot(sqrt(Score),StanRes3,ylab="Standardized Residuals",xlab=expression(sqrt(Score)))
plot(sqrt(Score),absrtsr3,ylab="Square Root(|Standardized Residuals|)",xlab=expression(sqrt(Score)))
abline(lsfit(sqrt(Score),absrtsr3),lty=2,col=2)

detach(salarygov)

#################EXERCISES

#Exercise 3.5.1

url <- 'https://gattonweb.uky.edu/faculty/sheather/book/docs/datasets/airfares.txt'

airfares <- read.table(url, header = TRUE)

attach(airfares)

#R output on page 104
m1 <- lm(Fare~Distance)
summary(m1)

#Figure 3.41 on page 104
par(mfrow=c(1,2))
plot(Distance,Fare)
abline(lsfit(Distance,Fare))
leverage1 <- hatvalues(m1)
StanRes1 <- rstandard(m1)
residual1 <- m1$residuals
plot(Distance,StanRes1, ylab="Standardized Residuals")
abline(h=2,lty=2)
abline(h=-2,lty=2)

detach(airfares)

#Exercise 3.5.4
# Load the dataset
url <- 'https://gattonweb.uky.edu/faculty/sheather/book/docs/datasets/glakes.txt'
glakes <- read.table(url, header = TRUE)

attach(glakes)

#R output on page 107
m1 <- lm(Time~Tonnage)
summary(m1)

#Figure 3.42 on page 106
par(mfrow=c(2,2))
plot(Tonnage,Time)
abline(lsfit(Tonnage,Time))
leverage1 <- hatvalues(m1)
StanRes1 <- rstandard(m1)
absrtsr1 <- sqrt(abs(StanRes1))
residual1 <- m1$residuals
plot(Tonnage,StanRes1, ylab="Standardized Residuals")
abline(h=2,lty=2)
abline(h=-2,lty=2)
plot(Tonnage,absrtsr1,ylab="Square Root(|Standardized Residuals|)")
abline(lsfit(Tonnage,absrtsr1),lty=2,col=1)
qqnorm(StanRes1, ylab = "Standardized Residuals")
qqline(StanRes1, lty = 2, col=1)

#Figure 3.43 on page 107
par(mfrow=c(3,2))
plot(density(Time,bw="SJ",kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab="Time")
rug(Time)
boxplot(Time,ylab="Time")
qqnorm(Time, ylab = "Time")
qqline(Time, lty = 2, col=1)
sj <- bw.SJ(Tonnage,lower = 0.1, upper = 1000)
plot(density(Tonnage,bw=sj,kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab="Tonnage")
rug(Tonnage)
boxplot(Tonnage,ylab="Tonnage")
qqnorm(Tonnage, ylab = "Tonnage")
qqline(Tonnage, lty = 2, col=1)


#R output on page 108
# Load required packages
library(alr4)
library(car)

# Output transformation summary using powerTransform from the car package
summary(tranxy <- powerTransform(cbind(Time,Tonnage)~1))

#R output on page 108
m2 <- lm(log(Time)~I(Tonnage^0.25))
summary(m2)

#Figure 3.44 on page 108
tTime <- log(Time)
tTonnage <- Tonnage^0.25
par(mfrow=c(2,2))
plot(tTonnage,tTime,xlab=expression(Tonnage^0.25),ylab="log(Time)")
abline(lsfit(tTonnage,tTime))
leverage2 <- hatvalues(m2)
StanRes2 <- rstandard(m2)
absrtsr2 <- sqrt(abs(StanRes2))
residual2 <- m2$residuals
plot(tTonnage,StanRes2, ylab="Standardized Residuals",xlab=expression(Tonnage^0.25))
abline(h=2,lty=2)
abline(h=-2,lty=2)
plot(tTonnage,absrtsr2,ylab="Square Root(|Standardized Residuals|)",xlab=expression(Tonnage^0.25))
abline(lsfit(tTonnage,absrtsr2),lty=2,col=1)
qqnorm(StanRes2, ylab = "Standardized Residuals")
qqline(StanRes2, lty = 2, col=1)

#Figure 3.45 on page 109
par(mfrow=c(3,2))
plot(density(tTime,bw="SJ",kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab="log(Time)")
rug(tTime)
boxplot(tTime,ylab="log(Time)")
qqnorm(tTime, ylab = "log(Time)")
qqline(tTime, lty = 2, col=1)
sj <- bw.SJ(tTonnage,lower = 0.1, upper = 1000)
plot(density(tTonnage,bw=sj,kern="gaussian"),type="l",
main="Gaussian kernel density estimate",xlab=expression(Tonnage^0.25))
rug(tTonnage)
boxplot(tTonnage,ylab=expression(Tonnage^0.25))
qqnorm(tTonnage, ylab=expression(Tonnage^0.25))
qqline(tTonnage, lty = 2, col=1)

detach(glakes)

#Exercise 3.5.5
# Load the dataset
url <- 'https://gattonweb.uky.edu/faculty/sheather/book/docs/datasets/cars04.csv'
cars04 <- read.csv(url, header = TRUE)

attach(cars04)

#Output from R on pages 110 and 111
m1 <- lm(SuggestedRetailPrice~DealerCost)
summary(m1)

#Figure 3.46 on page 110
par(mfrow=c(2,2))
plot(DealerCost,SuggestedRetailPrice)
abline(lsfit(DealerCost,SuggestedRetailPrice),lty = 2, col=1)
leverage1 <- hatvalues(m1)
StanRes1 <- rstandard(m1)
absrtsr1 <- sqrt(abs(StanRes1))
residual1 <- m1$residuals
plot(DealerCost,StanRes1, ylab="Standardized Residuals")
abline(h=2,lty=2)
abline(h=-2,lty=2)
plot(DealerCost,absrtsr1,ylab="Square Root(|Standardized Residuals|)")
abline(lsfit(DealerCost,absrtsr1),lty=2,col=1)
qqnorm(StanRes1, ylab = "Standardized Residuals")
qqline(StanRes1, lty = 2, col=1)

#Output from R on page 111
m2 <- lm(log(SuggestedRetailPrice)~log(DealerCost))
summary(m2)

#Figure 3.47 on page 111
par(mfrow=c(2,2))
plot(log(DealerCost),log(SuggestedRetailPrice))
abline(lsfit(log(DealerCost),log(SuggestedRetailPrice)),lty = 1, col=1)
leverage2 <- hatvalues(m2)
StanRes2 <- rstandard(m2)
absrtsr2 <- sqrt(abs(StanRes2))
residual2 <- m2$residuals
plot(log(DealerCost),StanRes2, ylab="Standardized Residuals")
abline(h=2,lty=2)
abline(h=-2,lty=2)
plot(log(DealerCost),absrtsr2,ylab="Square Root(|Standardized Residuals|)")
abline(lsfit(log(DealerCost),absrtsr2),lty=2,col=1)
qqnorm(StanRes2, ylab = "Standardized Residuals")
qqline(StanRes2, lty = 2, col=1)

detach(cars04)

#Exercise 3.5.6 based on a different set of generated data

library(alr4)
library(car)

n<-500
x <- runif(n,0,1)^3
e <- rnorm(n,0,0.1)
y <- exp(+2.5 + 1*x + e)

#Figure 3.48 on page 112
m1 <- lm(y~x)
par(mfrow=c(1,1))
inverseResponsePlot(m1)




